In [1]:
import missingno as msno
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn import metrics
import pickle
import pprint
from sklearn.ensemble import RandomForestRegressor
from pandas_profiling import ProfileReport
from dateutil import relativedelta
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split
from statsmodels.regression.linear_model import OLS

def new_line():
    print("\n-------------------------\n")

def RMSE(predictions):
    return round(np.sqrt(((test_y - predictions)**2).mean()))

def plot_numerical_columns(col_name):
    # return None

    # Histogram
    df[col_name].plot(kind="hist", figsize=(13,8));
    plt.title(col_name, size=18);
    plt.axhline(y=df[col_name].mean(), color='red');
    plt.axhline(y=df[col_name].median(), color='green');
    plt.legend(['Actual', 'Mean', 'Median']);
    plt.show()

    # Scatter plot
    df[col_name].plot(figsize=(13,8));
    plt.title(col_name, size=18);
    plt.axhline(y=df[col_name].mean(), color='red');
    plt.axhline(y=df[col_name].median(), color='green');
    plt.legend(['Actual', 'Mean', 'Median']);
    plt.show()

    # scatter plot (sort by values), values Vs index
    df[col_name].sort_values().reset_index(drop=True).plot(figsize=(13,8));
    plt.title(col_name+" (SORTED)", size=18);
    plt.axhline(y=df[col_name].mean(), color='red');
    plt.axhline(y=df[col_name].median(), color='green');
    plt.legend(['Actual', 'Mean', 'Median']);
    plt.show()

    # box plot
    df[col_name].plot(kind="box", figsize=(13,8))
    plt.title(col_name, size=18);
    plt.xlabel("");
    plt.show()

def plot_date_columns(col_name):
    # return None
    df[col_name].plot(figsize=(15,7), grid=True);
    plt.xlabel("Index", size=14);
    plt.ylabel("Date", size=14);
    plt.title(col_name + " Graph", size=18);
    plt.show();

    df[col_name].sort_values().reset_index(drop=True).plot(figsize=(15,7), grid=True);
    plt.xlabel("Index (sorted)", size=14);
    plt.ylabel("Year", size=14);
    plt.title(col_name + " Graph", size=18);
    plt.show();

    (df[col_name].dt.year.value_counts(sort=False).sort_index() / len(df) * 100).plot(kind="bar", figsize=(15,7), grid=True);
    plt.xlabel("Year", size=14);
    plt.ylabel("Ratio (1-100)", size=14);
    plt.title(col_name + " year Frequency Graph", size=18);
    plt.show();

    (df[col_name].dt.month.value_counts().sort_index()/len(df) * 100).plot(kind="bar", figsize=(15,7), grid=True);
    plt.xlabel("Month", size=14);
    plt.ylabel("Ratio (1-100)", size=14);
    plt.title(col_name + " month Frequency Graph", size=18);
    plt.show();

    (df[col_name].dt.day.value_counts().sort_index()/len(df) * 100).plot(kind="bar", figsize=(15,7), grid=True);
    plt.xlabel("Day", size=14);
    plt.ylabel("Ratio (1-100)", size=14);
    plt.title(col_name + " Day Frequency Graph", size=18);
    plt.show();

def plot_catagorical_columns(cat_variable):
    # return None
    (df[cat_variable].value_counts() / len(df) * 100).plot.bar(figsize=(15,6), grid=True);
    plt.title(cat_variable, size=18, color='r');
    plt.xlabel("Catagory", size=14, color='r');
    plt.ylabel("Ratio (1-100)", size=14, color='r');
    plt.show()

def data_shape():
    return f"The Data have:\n\t{df.shape[0]} rows\n\t{df.shape[1]} columns\n"
#===
# df = pd.read_csv("data.csv", date_parser=True)

# df = pd.read_csv("df_only_selected_columns_using_PCA.csv", date_parser=True)
# target_variable = "ACTUAL_WORTH"
# df = pd.concat([
#         df.select_dtypes("number").iloc[:, :3],
#         df.select_dtypes("O").iloc[:, :3],
#         df.select_dtypes(exclude=["number", "O"]),
#         df[[target_variable]]], 1)
# target_variable = "AREA_NAME_EN"

df = pd.read_csv("cleaned_data.csv", date_parser=True)
target_variable = "SalePrice"
#===
f = df[target_variable].isna().sum()
if f:
    new_line()
    print(f"There are {f} NAs in target values, we droped those rows")
    df = df[df[target_variable].notna()]
del f
#---------------------------------------------------
# df.select_dtypes("O").columns[:5]
# D = df.select_dtypes(exclude="O")
# D2 = df.select_dtypes("O").iloc[:,:5]
# df = pd.concat([D, D2], 1)

# profile = ProfileReport(df, title='Pandas Profiling Report', explorative=True)
# profile.to_file("your_report.html")
#---------------------------------------------------
new_line()
print(data_shape())
#===
new_line()
print(f"Columns types distribution:\n\n{df.dtypes.value_counts()}")
#---------------------------------------- NA
a = df.isna().sum().where(lambda x:x>0).dropna()
if a.size:
    new_line()
    print(f"There are {len(a)} (out of {df.shape[1]}, [{round(len(a)/df.shape[1]*100)}%]) columns that contains 1 or more")
    for i in a.index:
        df[i+"_NA_indicator"] = df[i].isna().replace({True : "Missing", False : "Not missing"})
    new_line()
    print(f"{a.size} NA_indicator variables added to the data\n")


    print("========= NA Graphs =========\n")
    msno.matrix(df);
    plt.title("NA Graph");
    plt.show()

    new_line()
    sns.heatmap(df.isnull(), cbar=False);
    plt.title("NA Graph");
    plt.show()
#===
a = a.sort_values()/len(df)*100
if (a == 100).sum():
    new_line()
    df.drop(columns=a[a==100].index, inplace=True)
    print(f"There are {(a == 100).sum()} columns that are all Missing values, so we droped those.\nNow {data_shape()}\n\nDropped columns names:")
    for i in a[a==100].index:
        print("\t",i)
    a = a[a != 100]
#===
x = df[a.index].dtypes.value_counts()
if x.size:
    new_line()
    print(f"NA columns data type Distribution:\n\n{x}")
del x
#===
new_line()
if a.size:
    print(f"NaN Ratio (0-100)\n\n{a}")
else:
    print("Now There is no NaN value in our Data")
#===
if df.select_dtypes("number").isna().sum().sum():
    new_line()
    print(f'(Before Missing values treatment)\nThere are {df.isna().sum().sum()} Missing values:\n\t{df.select_dtypes("O").isna().sum().sum()} in catagorical variables\n\t{df.select_dtypes("number").isna().sum().sum()} in numerical columns\n\t{df.select_dtypes(exclude=["O", "number"]).isna().sum().sum()} in others')

    from sklearn.impute import KNNImputer
    df_not_a_number  = df.select_dtypes(exclude="number")
    imputer = KNNImputer(n_neighbors=4, weights="uniform")
    imputed = imputer.fit_transform(df.select_dtypes("number"))
    df = pd.DataFrame(imputed, columns=df.select_dtypes("number").columns)
    df = pd.concat([df, df_not_a_number], axis=1)
    del df_not_a_number

    print(f'\n(After filling numeric missing values)\nThere are {df.isna().sum().sum()} Missing values:\n\t{df.select_dtypes("O").isna().sum().sum()} in catagorical variables\n\t{df.select_dtypes("number").isna().sum().sum()} in numerical columns\n\t{df.select_dtypes(exclude=["O", "number"]).isna().sum().sum()} in others')
#===
# --------------------------------------------------------- Unique values
only_one_unique_value = df.nunique().where(lambda x:x == 1).dropna()
if only_one_unique_value.size:
    new_line()
    df.drop(columns=only_one_unique_value.index, inplace=True)
    print(f"There are {only_one_unique_value.size} variables That have only one unique value, so we drop those.\n\nNow {data_shape()}\n\nThose columns names in order:\n")
    for i in only_one_unique_value.index.sort_values():
        print(i)
del only_one_unique_value
# #===
all_values_are_unique = df.apply(lambda x:x.is_unique).where(lambda x:x==True).dropna()
if all_values_are_unique.size:
    new_line()
    df.drop(columns=all_values_are_unique.index, inplace=True)
    print(f"There are {all_values_are_unique.size} column/s that have all unique values, so no value repeatation, we droped those columns.\n\nNow {data_shape()}\nThose column/s name/s are:\n")
    for i in all_values_are_unique.index:
        print("\t", i)
del all_values_are_unique
#===
date_columns = []
def DTYPES():
    global date_columns
    catagorical_columns = df.head().select_dtypes("O").columns
    numerical_columns   = df.head().select_dtypes("number").columns
    date_columns        = []

    for i in catagorical_columns:
        try:
            df[i] = pd.to_datetime(df[i])
            date_columns.append(i)
        except:
            pass

    catagorical_columns = catagorical_columns.drop(date_columns)
    if date_columns:
        date_columns = pd.Index(date_columns)
    #===
    if not catagorical_columns.append(numerical_columns).append(date_columns).is_unique:
        new_line()
        print("Some column/s repated in > 1 dtypes\n")
        dtypes = pd.DataFrame({"Column" : catagorical_columns.append(numerical_columns).append(date_columns),
                    "dtype" : ['O']*len(catagorical_columns) + ['Number']*len(numerical_columns) + ['Date']*len(date_columns)})
        print(dtypes[dtypes.Column.isin(list(dtypes[dtypes.Column.duplicated()].Column.values))].to_string())
    #===
    x = df.columns.difference(
        catagorical_columns.append(numerical_columns).append(date_columns)
        )
    if x.size:
        new_line()
        print("Some columns not included in any existing catagory, those:\n")
        for i in x:
            print(f"\t<{i}, with dtype of <{df[i].dtype}>")
    #===
    dtypes = pd.DataFrame({"Column" : catagorical_columns.append(numerical_columns).append(date_columns),
                "dtype" : ['Object']*len(catagorical_columns) + ['Number']*len(numerical_columns) + ['Date']*len(date_columns)})
    return dtypes
#===
dtypes = DTYPES()
# ----------------------------------------------------------------------- Feature enginearing
# ======= Adding date columns
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> add polynomial, sqrt, tree, log features
def add_new_date_cols(x, suffix):
    d = {}
    d[suffix + '_week_normalized'] = x.dt.week / 52
    d[suffix + '_week_str'] = '"' + x.dt.week.apply(lambda x:np.nan if np.isnan(x) else str(x).replace(".0", "")) + '"'

    d[suffix + '_year_after_min_year'] = x.dt.year - x.dt.year.min()
    d[suffix + '_year_str'] = '"' + x.dt.year.apply(lambda x:np.nan if np.isnan(x) else str(x).replace(".0", "")) + '"'

    d[suffix + '_day_name']  = x.dt.day_name()

    d[suffix + '_day_after_min_date_str']  = '"' + (x - x.min()).apply(lambda x: str(x).split()[0]) + '"'

    d[suffix + '_day_normalized'] = x.dt.day / 31

    d[suffix + '_hour_normalized'] = x.dt.hour / 24
    d[suffix + '_hour_str'] = '"' + x.dt.hour.apply(lambda x:np.nan if np.isnan(x) else str(x).replace(".0", "")) + '"'

    d[suffix + '_month_name'] = x.dt.month_name()
    d[suffix + '_month_normalized'] = x.dt.month/12
    for k,v in d.items():
        if v.nunique() > 1:
            df[k] = v
    return df.drop(columns=x.name)
    # return df

len_df_before_adding_date_vars = df.shape[1]
for date_col in date_columns:
    df = add_new_date_cols(df[date_col], date_col)
len_df_after_adding_date_vars  = df.shape[1]
if len_df_after_adding_date_vars > len_df_before_adding_date_vars:
    new_line()
    print(f"Added {len_df_after_adding_date_vars - len_df_before_adding_date_vars} date Features")
# ======= type casting of numerical variable (those who have < 4% unique values) to catagorical variables
f = (df.select_dtypes("number").nunique() / len(df) * 100).where(lambda x:x<4).dropna().index
if f.size:
    len_df_before_adding_date_vars = df.shape[1]
    for col_num_to_str in f:
        df[col_num_to_str+"_str"] = '"' + df[col_num_to_str].astype(str) + '"'
    len_df_after_adding_date_vars  = df.shape[1]
    new_line()
    print(f"Added {len_df_after_adding_date_vars - len_df_before_adding_date_vars} String Features (Extracted from numerical variables)")
# =======
def cluping_rare_cases_in_one_catagory(x):
    global df
    x = df[x]
    orignal  = x.copy("deep")
    xx = x.value_counts()
    xx = xx[xx< 10].index.to_list()
    x =  x.replace(xx , "Rare cases")
    if x.value_counts()[-1] < 8:
        x[x == "Rare cases"] = x.mode()[0] # agar "Rare cases" vali catogery me 8 sy bhi kam values hon to un ko most common value sy replace kar do
    if x.nunique() == 1:
        new_line()
        print(f"The column <{x.name}> have only one unique value, We droped it from the data.")
        # return orignal
        df.drop(columns=x.name, inplace=True)
        return None
    return x

for var in df.select_dtypes("O").columns:
    m = cluping_rare_cases_in_one_catagory(var)
    if isinstance(m, pd.core.series.Series):
        df[var] = m
new_line()


xx = (df == 'Rare cases').sum().sort_values().where(lambda x:x>0).dropna()
xx = pd.DataFrame({"Count" : xx,
                "Ratio" : round(xx/len(df)*100, 4)})
print(f"<Rare case> catagory:\n{xx.to_string()}")
# ----------------------------------------------------------------------- END (Feature enginearing)
dtypes = DTYPES()
# ---------------------------------------------------- Correlation plot
new_line()
cor_df = df.select_dtypes('number').corr().abs()
mask = np.triu(np.ones_like(cor_df, dtype=bool));
f, ax = plt.subplots(figsize=(17, 10));
cmap = sns.color_palette("viridis", as_cmap=True);
plot_ = sns.heatmap(cor_df, mask=mask, cmap=cmap, vmax=.3, square=True, linewidths=.5, cbar_kws={"shrink": .5});
plot_.axes.set_title("abs (Correlation) plot",fontsize=25);
plt.show()
# ---------------------------------------------------------------------
#===
# m = 0
for row in dtypes.iterrows():
    # m += 1
    # if m == 3:
        # break
    column_name, type_ = row[1]
    x = df[column_name]
    print(f"\n\n\n========================================= {column_name} =========================================\n\n")

    for col_ in df.columns:
        if col_ == column_name:
            continue
        if df[col_].nunique() == df[column_name].nunique():
            unique_combination = df[[col_, column_name]].drop_duplicates()
            if unique_combination.apply(lambda x:x.is_unique).sum() == 2:
                new_line()
                print(f"This Columns is duplicate of <{col_}> column")

    print(f"Column Type     : {type_}")
    if x.isna().all():
        new_line()
        df.drop(columns=column_name, inplace=True)
        print("We dropped This column, because it is all Empty")
        continue
    if type_ in ["O", "Date"]:
        if x.is_unique:
            new_line()
            df.drop(columns=column_name, inplace=True)
            print(f"We dropped This column, because it's a {type_} columns, and it's all values are unique")
            continue
    if x.nunique() == 1:
        new_line()
        df.drop(columns=column_name, inplace=True)
        print(f"We dropped This column, because There is only one unique value")
        continue

    if type_ == "Number":
        local_cor = cor_df[column_name].drop(column_name).reset_index()
        local_cor = local_cor.reindex(local_cor[column_name].abs().sort_values().index)
        if local_cor[column_name].max() == 1:
            new_line()
            print(f"This column is perfactly correlated with column <{local_cor[local_cor[column_name] == 1]['index'].values[0]}, so remove one of them")

        new_line()
        xm = local_cor[-3:].rename(columns={'index' : 'Column name', column_name : 'Correlation'}).reset_index(drop=True)
        xm.index = xm['Column name']
        xm.drop(columns="Column name", inplace=True);
        xm.plot(kind='barh', grid=True, figsize=(10,1.5));
        plt.title("Most 3 correlated features with this columns (sorted)", size=14);
        plt.xlabel("Correlation", size=12);
        plt.show();

        new_line()
        skewness = x.skew(skipna = True)
        if abs(skewness) < 0.5:
            print(f"The data is fairly symmetrical (skewness is: {skewness})")
        elif abs(skewness) < 1:
            print(f"The data are moderately skewed (skewness is: {skewness})")
        else:
            print(f"The data are highly skewed (skewness is: {skewness})\nNote: When skewness exceed |1| we called it highly skewed")

        # f = x.describe()
        # f['Nunique'] = x.nunique()
        # f['Nunique ratio'] = f.loc["Nunique"] / f.loc["count"] * 100
        # f['Outlies count'] = (((x - x.mean())/x.std()).abs() > 3).sum()
        # f['Outlies ratio'] = f.loc["Outlies count"] / f.loc["count"] * 100
        # f['Nagative values count'] = (x < 0).sum()
        # f['Nagative values ratio'] = f['Nagative values count'] / f['count'] * 100

        ff = [x.count(), x.isna().sum(), x.mean(), x.std(), x.min()]
        ff += x.quantile([.25,.5,.75]).to_list()
        ff += [x.max(), x.nunique(), (((x - x.mean())/x.std()).abs() > 3).sum(), (x < 0).sum(), (x == 0).sum()]

        f = pd.DataFrame(ff, index=['Count', 'NA', 'Mean', 'Std', 'Min', '25%', '50%', '75%', 'Max', 'Nunique', 'Outlies', 'Nagetive', 'Zeros'], columns=['Count'])
        f['Ratio'] = f.Count / x.count() * 100
        f.loc['Mean' : 'Max', 'Ratio'] = None

        new_line()
        print(f.round(2).to_string())
        plot_numerical_columns(column_name)

    elif type_ == "Object":
        # f = x.describe()
        # f = x.agg(['count', pd.Series.nunique])
        # f['len'] = len(x)
        # f['Na count'] = x.isna().sum()
        # f['Na ratio'] = f['Na count'] / f['count'] * 100
        # f['Most frequent'] = x.mode().values[0]
        # f['Most frequent count'] = (x == f['Most frequent']).sum()
        # f['Most frequent ratio'] = f['Most frequent count'] / f['count'] * 100
        # f['Least frequent'] = x.value_counts().tail(1).index[0]
        # f['Least frequent count'] = (x == f['Least frequent']).sum()
        # f['Least frequent ratio'] = f['Least frequent count'] / f['count'] * 100
        # f['Values occured only once count'] = x.value_counts().where(lambda x:x==1).dropna().size
        # f['Values occured only once Ratio'] = f['Values occured only once count'] / x.count() * 100

        l = x.count(), x.nunique(), len(x), x.isna().sum(), (x == x.mode().values[0]).sum(), (x == x.value_counts().tail(1).index[0]).sum(), x.value_counts().where(lambda x:x==1).dropna().size
        f = pd.DataFrame(l, index=['Count', 'Nunique', 'Len', 'NA', 'Most frequent', 'Least frequent', 'Values occured only once'], columns=['Counts'])
        f['Ratio'] = (f.Counts / x.count() * 100).round(4)
        f.loc[['Len'], 'Ratio'] = None

        new_line()
        print(f.to_string())


        if x.str.lower().nunique() != x.nunique():
            new_line()
            print(f"Case issue\n\tin orignal variable There are {x.nunique()} unique values\n\tin lower verstion there are   {x.str.lower().nunique()} unique values.\n")

        if x.str.strip().nunique() != x.nunique():
            new_line()
            print(f"Space issue\n\tin orignal variable There are {x.nunique()} unique values\n\tin striped verstion there are {x.str.strip().nunique()} unique values.")

        plot_catagorical_columns(column_name)

    elif type == "Date":

        new_line()
        rd = relativedelta.relativedelta( pd.to_datetime(x.max()), pd.to_datetime(x.min()))
        print(f"Diffrenece between first and last date:\n\tYears : {rd.years}\n\tMonths: {rd.months}\n\tDays  : {rd.days}")

        # f = pd.Series({'Count' : x.count(),
        #             'Nunique count' : x.nunique(),
        #             'Nunique ratio' : x.nunique() / x.count() * 100,
        #             'Most frequent value' : str(x.mode()[0]),
        #             'Least frequent value' :  x.value_counts().tail(1).index[0]
        #             })
        # f['Most frequent count'] = (x == f['Most frequent value']).sum()
        # f['Most frequent ratio'] = f['Most frequent count'] / f['Count'] * 100
        # f['Least frequent count'] = (x == f['Least frequent value']).sum()
        # f['Least frequent ratio'] = f['Least frequent count'] / f['Count'] * 100
        # f['Values occured only once count'] = x.value_counts().where(lambda x:x==1).dropna().size
        # f['Values occured only once Ratio'] = f['Values occured only once count'] / x.count() * 100

        ff = x.count(), x.nunique(), (x == x.mode().values[0]).sum(), (x == x.value_counts().tail(1).index[0]).sum(), x.value_counts().where(lambda x:x==1).dropna().size
        f = pd.DataFrame(ff, index=["Count", 'Nunique', 'Most frequent values', 'Least frequent values', 'Values occured only once count'], columns=['Counts'])
        f['Ratio'] = (f.Counts / x.count() * 100).round(4)

        new_line()
        print(f"\n{f.to_string()}")


        f = set(np.arange(x.dt.year.min(),x.dt.year.max()+1)).difference(
            x.dt.year.unique())
        if f:
            new_line()
            print(f"These Years (in order) are missing:\n")
            for i in f:
                print("\t", i, end=", ")

        f = set(np.arange(x.dt.month.min(),x.dt.month.max()+1)).difference(
            x.dt.month.unique())
        if f:
            new_line()
            print(f"These Months (in order) are missing:\n")
            for i in f:
                print("\t", i, end=", ")

        f = set(np.arange(x.dt.day.min(),x.dt.day.max()+1)).difference(
            x.dt.day.unique())
        if f:
            new_line()
            print(f"These Days (in order) are missing:\n")
            for i in f:
                print("\t", i, end=", ")

        new_line()
        plot_date_columns(column_name)


# ================================================================================================================ Modeling
print("\n\n")
print("----------------------------------------------------------------------------------------------")
print("****************************************** Modeling ******************************************")

# Regression problem
if df[target_variable].dtype in [float, int]:

    print("\n-------------------- This is Regression problem --------------------\n")
    print("''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''")

    df_T = df.select_dtypes("number")
    cat_cols = pd.get_dummies(df.select_dtypes(exclude="number"), prefix_sep="__")
    df_T[cat_cols.columns.to_list()] = cat_cols

    df = df_T.copy("deep")
    del df_T
    del cat_cols
    # ====
    train_X, test_X, train_y, test_y = train_test_split(df.drop(columns=target_variable), df[target_variable])
    # ====
    # --------------------------------------------------------- Linear regression
    print("\n ------------------------------------- Linear Regression -------------------------------------\n")
    model_reg = OLS(train_y, train_X).fit()
    summary = model_reg.summary()
    summary_df = pd.DataFrame(summary.tables[1])
    summary_df.columns = summary_df.iloc[0]
    summary_df.drop(0, inplace=True)
    summary_df.columns = summary_df.columns.astype(str)
    summary_df.columns = ["Variable"] + summary_df.columns[1:].to_list()
    for i in summary_df.columns[1:]:
        summary_df[i] = summary_df[i].astype(str).astype(float)
    summary_df.Variable = summary_df.Variable.astype(str)
    summary_df['Indicator'] = summary_df['P>|t|'].apply(lambda x:"***" if x < 0.001 else "**" if x < 0.01 else "*" if x < 0.05 else "." if x < 0.1  else "")
    summary_df = summary_df.sort_values("Variable").reset_index(drop=True)
    summary_df.to_csv()
    new_line()
    print("NOTE: This summary saved as <summary_OLS_1.csv>")
    new_line()
    print(summary_df.to_string())
    # ============================= Model statistic
    predictions = model_reg.predict(test_X)

    new_line()
    print(" --- Model statistic --- \n")
    print(f"R-squared         : {round(model_reg.rsquared, 3)}")
    print(f"Adj. R-squared    : {round(model_reg.rsquared_adj, 3)}")
    print(f"F-statistic       : {round(model_reg.fvalue)}")
    print(f"Prob (F-statistic): {model_reg.f_pvalue}")
    print(f"No. Observations  : {round(model_reg.nobs)}")
    print(f"AIC               : {round(model_reg.aic)}")
    print(f"Df Residuals      : {round(model_reg.df_resid)}")
    print(f"BIC               : {round(model_reg.bic)}")
    print(f"RMSE (test)       : {RMSE(predictions)}")
    # ======
    f = train_X.copy("deep")
    f['Errors__'] = model_reg.resid
    f = f.corr()['Errors__'].drop("Errors__").abs().sort_values().dropna().tail(1)
    new_line()
    print(f"Maximum correlation between Reseduals and any data columns is {f.values[0]}, with columns <{f.index[0]}>")
    print(f"Mean of train reseduals: {model_reg.resid.mean()}")
    del f
    # ============================= END (Model statistic)
    # --------------------------------------------------------- END Linear regression




    # --------------------------------------------------------- Random Forest
    print("\n ------------------------------------- Random Forest -------------------------------------\n")

    rf = RandomForestRegressor(n_estimators = 200, oob_score=True)
    model_rf = rf.fit(train_X, train_y);
    predictions_rf = rf.predict(test_X)

    new_line()
    print(f"RF model peramters:\n")
    pprint.pprint(model_rf.get_params())

    new_line()
    importances = list(rf.feature_importances_)
    feature_importances = [(feature, round(importance, 2)) for feature, importance in zip(test_X, importances)]
    featuresImportance = pd.Series(model_rf.feature_importances_, index=train_X.columns).sort_values(ascending=False)
    if len(featuresImportance) > 30:
        featuresImportance = featuresImportance.head(30)
    featuresImportance.plot(figsize=(20,10), kind='bar', grid=True);
    plt.title("RandomForest Feature importances Graph", size=18,color='red');
    plt.xlabel("Features", size=14, color='red');
    plt.ylabel("Importance", size=14, color='red');
    plt.show();
    del featuresImportance

    new_line()
    print("--- Model statistic ---")
    # The coefficient of determination R^2 of the prediction.
    # https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html
    print(f"R^2 (test) : {rf.score(test_X, test_y)}")
    print(f"R^2 (train): {rf.score(train_X, train_y)}")
    print(f"RMSE (test): {RMSE(predictions_rf)}")
    print(f"oob score  : {model_rf.oob_score_}")

    f = test_X.copy("deep")
    errors_rf = predictions_rf - test_y
    f['Errors__'] = errors_rf
    f = f.corr()['Errors__'].drop("Errors__").abs().sort_values().dropna().tail(1)
    new_line()
    print(f"Maximum correlation between Reseduals and any data columns is {f.values[0]}, with columns <{f.index[0]}>")
    # --------------------------------------------------------- END Random Forest
elif df[target_variable].dtype == "O":
    # Classififcation problem
    if df[target_variable].nunique() == 2:
        print("\n-------------------- This is Binary Classification problem --------------------\n")
        print("''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''")
        df = pd.concat([
                        df.select_dtypes(exclude = "O"),
                        pd.get_dummies(df.drop(columns=target_variable).select_dtypes("O")),
                        df[[target_variable]]
                        ], 1)

        train_X, test_X, train_y, test_y = train_test_split(df.drop(columns=target_variable), df[target_variable])
        clf = LogisticRegression().fit(train_X, train_y)
        predictions = clf.predict_proba(test_X)
        predictions = pd.Series(predictions[:, 0])
        lst = []
        for thresh in np.linspace(predictions.min(), predictions.max(), 50)[1:]:
            pred = predictions < thresh

            pred.loc[pred == True] = clf.classes_[0]
            pred.loc[pred == False] = clf.classes_[1]

            test_y = test_y.reset_index(drop=True)

            TN = ((pred == clf.classes_[0]) & (test_y == clf.classes_[0])).sum()
            TP = ((pred == clf.classes_[1]) & (test_y == clf.classes_[1])).sum()
            FN = ((pred == clf.classes_[0]) & (test_y == clf.classes_[1])).sum()
            FP = ((pred == clf.classes_[1]) & (test_y == clf.classes_[0])).sum()

            p = TP / (TP + FP)
            r = TP / (TP + FN)
            f = 2 * ((p * r) / (p+r))

            lst.append((thresh, (pred == test_y).mean(), p, r , f))

        d = pd.DataFrame(lst, columns=["Thresold", "Accurecy(0-1)", "Precision", "Recall", "F1"])
        d = d.set_index("Thresold")
        d.plot(grid=True, figsize=(18,7));
        plt.title("Model performance at diffrent Thresolds", size=18, color='red');
        plt.xlabel("Thresold", size=14, color='red');
        plt.ylabel("");
        plt.show()
    else:
        print("\n-------------------- This is Multiclass Classification problem --------------------\n")
        print("'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''")

        df.loc[:, df.select_dtypes("O").columns] = df.select_dtypes("O").apply(lambda x: pd.Series(LabelEncoder().fit_transform(x.astype(str))).astype(str))
        train_X, test_X, train_y, test_y = train_test_split(df.drop(columns=target_variable), df[target_variable])

        clf=RandomForestClassifier(n_estimators=1000).fit(train_X, train_y)
        predictions = clf.predict(test_X)
        feature_imp = pd.Series(clf.feature_importances_,index=train_X.columns).sort_values(ascending=False)
        if feature_imp.size > 30:
            feature_imp = feature_imp.head(30)
        feature_imp.plot(kind='barh', figsize=(17,10), grid=True);
        plt.title("Feature importances Graph", size=18, color='red');
        plt.xlabel("Importance", size=14, color='red');
        plt.ylabel("Feature", size=14, color='red');
        plt.show()
        # ====
        f = (test_y, predictions)
        f_int = (test_y.astype(int), predictions.astype(int))

        print(f"accuracy_score: {metrics.accuracy_score(*f)}")
        print(f"f1_score: {metrics.f1_score(*f_int)}")

        metrics.plot_roc_curve(clf, test_X, test_y);
        plt.title("ROC curve plot");
        plt.show();

        metrics.ConfusionMatrixDisplay(metrics.confusion_matrix(*f)); plt.show()

        metrics.plot_confusion_matrix(clf, test_X, test_y);
        plt.title("Confusion matrix");
        plt.show()

        metrics.plot_precision_recall_curve(clf, test_X, test_y);
        plt.title("Precision recall curve");
        plt.show()
# ================================================================================================================ END Modeling
-------------------------

There are 1459 NAs in target values, we droped those rows

-------------------------

The Data have:
	1460 rows
	73 columns


-------------------------

Columns types distribution:

object     39
int64      23
float64    11
dtype: int64

-------------------------

Now There is no NaN value in our Data

-------------------------

There are 1 variables That have only one unique value, so we drop those.

Now The Data have:
	1460 rows
	72 columns


Those columns names in order:

train_or_test

-------------------------

There are 1 column/s that have all unique values, so no value repeatation, we droped those columns.

Now The Data have:
	1460 rows
	71 columns

Those column/s name/s are:

	 Id

-------------------------

Added 15 String Features (Extracted from numerical variables)

-------------------------

<Rare case> catagory:
                  Count    Ratio
HouseStyle          8.0   0.5479
FullBath_str        9.0   0.6164
Foundation          9.0   0.6164
RoofStyle           9.0   0.6164
Neighborhood       11.0   0.7534
BedroomAbvGr_str   14.0   0.9589
Condition1         15.0   1.0274
GarageType         15.0   1.0274
Exterior2nd        17.0   1.1644
GarageCond         18.0   1.2329
SaleType           28.0   1.9178
MiscVal_str        41.0   2.8082
GarageYrBlt       160.0  10.9589

-------------------------



========================================= Alley =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1369   93.7671
Least frequent                41    2.8082
Values occured only once       0    0.0000


========================================= BldgType =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1220   83.5616
Least frequent                31    2.1233
Values occured only once       0    0.0000


========================================= BsmtCond =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1313   89.9315
Least frequent                37    2.5342
Values occured only once       0    0.0000


========================================= BsmtExposure =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent                953   65.2740
Least frequent                38    2.6027
Values occured only once       0    0.0000


========================================= BsmtFinType1 =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        7    0.4795
Len                         1460       NaN
NA                             0    0.0000
Most frequent                430   29.4521
Least frequent                37    2.5342
Values occured only once       0    0.0000


========================================= BsmtFinType2 =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        7    0.4795
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1256   86.0274
Least frequent                14    0.9589
Values occured only once       0    0.0000


========================================= BsmtQual =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent                649   44.4521
Least frequent                35    2.3973
Values occured only once       0    0.0000


========================================= CentralAir =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1365   93.4932
Least frequent                95    6.5068
Values occured only once       0    0.0000


========================================= Condition1 =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        7    0.4795
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1260   86.3014
Least frequent                11    0.7534
Values occured only once       0    0.0000


========================================= Electrical =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1339   91.7123
Least frequent                27    1.8493
Values occured only once       0    0.0000


========================================= ExterCond =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1286   88.0822
Least frequent                28    1.9178
Values occured only once       0    0.0000


========================================= ExterQual =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                906   62.0548
Least frequent                14    0.9589
Values occured only once       0    0.0000


========================================= Exterior1st =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       10    0.6849
Len                         1460       NaN
NA                             0    0.0000
Most frequent                522   35.7534
Least frequent                20    1.3699
Values occured only once       0    0.0000


========================================= Exterior2nd =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       12    0.8219
Len                         1460       NaN
NA                             0    0.0000
Most frequent                504   34.5205
Least frequent                10    0.6849
Values occured only once       0    0.0000


========================================= Fence =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1179   80.7534
Least frequent                11    0.7534
Values occured only once       0    0.0000


========================================= FireplaceQu =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        6    0.4110
Len                         1460       NaN
NA                             0    0.0000
Most frequent                690   47.2603
Least frequent                20    1.3699
Values occured only once       0    0.0000


========================================= Foundation =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent                647   44.3151
Least frequent                 9    0.6164
Values occured only once       0    0.0000


========================================= Functional =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1366   93.5616
Least frequent                14    0.9589
Values occured only once       0    0.0000


========================================= GarageCond =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1326   90.8219
Least frequent                18    1.2329
Values occured only once       0    0.0000


========================================= GarageFinish =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                605   41.4384
Least frequent                81    5.5479
Values occured only once       0    0.0000


========================================= GarageQual =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1317   90.2055
Least frequent                14    0.9589
Values occured only once       0    0.0000


========================================= GarageType =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        6    0.4110
Len                         1460       NaN
NA                             0    0.0000
Most frequent                870   59.5890
Least frequent                15    1.0274
Values occured only once       0    0.0000


========================================= GarageYrBlt =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       59    4.0411
Len                         1460       NaN
NA                             0    0.0000
Most frequent                160   10.9589
Least frequent                10    0.6849
Values occured only once       0    0.0000


========================================= HeatingQC =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                742   50.8219
Least frequent                49    3.3562
Values occured only once       0    0.0000


========================================= HouseStyle =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        8    0.5479
Len                         1460       NaN
NA                             0    0.0000
Most frequent                726   49.7260
Least frequent                 8    0.5479
Values occured only once       0    0.0000


========================================= KitchenQual =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                735   50.3425
Least frequent                39    2.6712
Values occured only once       0    0.0000


========================================= LandContour =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1311   89.7945
Least frequent                36    2.4658
Values occured only once       0    0.0000


========================================= LandSlope =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1382   94.6575
Least frequent                13    0.8904
Values occured only once       0    0.0000


========================================= LotConfig =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1056   72.3288
Least frequent                47    3.2192
Values occured only once       0    0.0000


========================================= LotShape =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                925   63.3562
Least frequent                10    0.6849
Values occured only once       0    0.0000


========================================= MSZoning =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1151   78.8356
Least frequent                10    0.6849
Values occured only once       0    0.0000


========================================= MasVnrType =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                872   59.7260
Least frequent                15    1.0274
Values occured only once       0    0.0000


========================================= MiscFeature =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1411   96.6438
Least frequent                49    3.3562
Values occured only once       0    0.0000


========================================= Neighborhood =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       24    1.6438
Len                         1460       NaN
NA                             0    0.0000
Most frequent                225   15.4110
Least frequent                11    0.7534
Values occured only once       0    0.0000


========================================= PavedDrive =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1340   91.7808
Least frequent                30    2.0548
Values occured only once       0    0.0000


========================================= RoofStyle =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1141   78.1507
Least frequent                 9    0.6164
Values occured only once       0    0.0000


========================================= SaleCondition =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1202   82.3288
Least frequent                12    0.8219
Values occured only once       0    0.0000


========================================= SaleType =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1267   86.7808
Least frequent                28    1.9178
Values occured only once       0    0.0000


========================================= BedroomAbvGr_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        6    0.4110
Len                         1460       NaN
NA                             0    0.0000
Most frequent                804   55.0685
Least frequent                14    0.9589
Values occured only once       0    0.0000


========================================= BsmtFullBath_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent                857   58.6986
Least frequent                15    1.0274
Values occured only once       0    0.0000


========================================= BsmtHalfBath_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1380   94.5205
Least frequent                80    5.4795
Values occured only once       0    0.0000


========================================= Fireplaces_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent                695   47.6027
Least frequent               115    7.8767
Values occured only once       0    0.0000


========================================= FullBath_str =========================================



-------------------------

This Columns is duplicate of <FullBath> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                768   52.6027
Least frequent                 9    0.6164
Values occured only once       0    0.0000


========================================= GarageCars_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                829   56.7808
Least frequent                81    5.5479
Values occured only once       0    0.0000


========================================= HalfBath_str =========================================



-------------------------

This Columns is duplicate of <HalfBath> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent                913   62.5342
Least frequent                12    0.8219
Values occured only once       0    0.0000


========================================= KitchenAbvGr_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1395   95.5479
Least frequent                65    4.4521
Values occured only once       0    0.0000


========================================= MSSubClass_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       14    0.9589
Len                         1460       NaN
NA                             0    0.0000
Most frequent                540   36.9863
Least frequent                10    0.6849
Values occured only once       0    0.0000


========================================= MiscVal_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1408   96.4384
Least frequent                11    0.7534
Values occured only once       0    0.0000


========================================= MoSold_str =========================================



-------------------------

This Columns is duplicate of <MoSold> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       12    0.8219
Len                         1460       NaN
NA                             0    0.0000
Most frequent                253   17.3288
Least frequent                52    3.5616
Values occured only once       0    0.0000


========================================= OverallCond_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        7    0.4795
Len                         1460       NaN
NA                             0    0.0000
Most frequent                827   56.6438
Least frequent                22    1.5068
Values occured only once       0    0.0000


========================================= OverallQual_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        8    0.5479
Len                         1460       NaN
NA                             0    0.0000
Most frequent                402   27.5342
Least frequent                18    1.2329
Values occured only once       0    0.0000


========================================= TotRmsAbvGrd_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       10    0.6849
Len                         1460       NaN
NA                             0    0.0000
Most frequent                404   27.6712
Least frequent                11    0.7534
Values occured only once       0    0.0000


========================================= YrSold_str =========================================



-------------------------

This Columns is duplicate of <YrSold> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent                338   23.1507
Least frequent               175   11.9863
Values occured only once       0    0.0000


========================================= 1stFlrSF =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.3767566220336365)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      1162.63     NaN
Std        386.59     NaN
Min        334.00     NaN
25%        882.00     NaN
50%       1087.00     NaN
75%       1391.25     NaN
Max       4692.00     NaN
Nunique    753.00   51.58
Outlies     12.00    0.82
Nagetive     0.00    0.00
Zeros        0.00    0.00


========================================= 2ndFlrSF =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.8130298163023265)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean       346.99     NaN
Std        436.53     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%        728.00     NaN
Max       2065.00     NaN
Nunique    417.00   28.56
Outlies      4.00    0.27
Nagetive     0.00    0.00
Zeros      829.00   56.78


========================================= BedroomAbvGr =========================================


Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.21179009627507137)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         2.87     NaN
Std          0.82     NaN
Min          0.00     NaN
25%          2.00     NaN
50%          3.00     NaN
75%          3.00     NaN
Max          8.00     NaN
Nunique      8.00    0.55
Outlies     14.00    0.96
Nagetive     0.00    0.00
Zeros        6.00    0.41


========================================= BsmtFinSF1 =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.685503071910789)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean       443.64     NaN
Std        456.10     NaN
Min          0.00     NaN
25%          0.00     NaN
50%        383.50     NaN
75%        712.25     NaN
Max       5644.00     NaN
Nunique    637.00   43.63
Outlies      6.00    0.41
Nagetive     0.00    0.00
Zeros      467.00   31.99


========================================= BsmtFinSF2 =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 4.255261108933303)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean        46.55     NaN
Std        161.32     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max       1474.00     NaN
Nunique    144.00    9.86
Outlies     50.00    3.42
Nagetive     0.00    0.00
Zeros     1293.00   88.56


========================================= BsmtFullBath =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.596066609663168)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         0.43     NaN
Std          0.52     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          1.00     NaN
Max          3.00     NaN
Nunique      4.00    0.27
Outlies     16.00    1.10
Nagetive     0.00    0.00
Zeros      856.00   58.63


========================================= BsmtHalfBath =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 4.103402697955168)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         0.06     NaN
Std          0.24     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max          2.00     NaN
Nunique      3.00    0.21
Outlies     82.00    5.62
Nagetive     0.00    0.00
Zeros     1378.00   94.38


========================================= BsmtUnfSF =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.9202684528039037)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean       567.24     NaN
Std        441.87     NaN
Min          0.00     NaN
25%        223.00     NaN
50%        477.50     NaN
75%        808.00     NaN
Max       2336.00     NaN
Nunique    780.00   53.42
Outlies     11.00    0.75
Nagetive     0.00    0.00
Zeros      118.00    8.08


========================================= EnclosedPorch =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 3.08987190371177)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean        21.95     NaN
Std         61.12     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max        552.00     NaN
Nunique    120.00    8.22
Outlies     51.00    3.49
Nagetive     0.00    0.00
Zeros     1252.00   85.75


========================================= Fireplaces =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.6495651830548841)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         0.61     NaN
Std          0.64     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          1.00     NaN
75%          1.00     NaN
Max          3.00     NaN
Nunique      4.00    0.27
Outlies      5.00    0.34
Nagetive     0.00    0.00
Zeros      690.00   47.26


========================================= FullBath =========================================



-------------------------

This Columns is duplicate of <FullBath_str> column
Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.036561558402727165)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         1.57     NaN
Std          0.55     NaN
Min          0.00     NaN
25%          1.00     NaN
50%          2.00     NaN
75%          2.00     NaN
Max          3.00     NaN
Nunique      4.00    0.27
Outlies      0.00    0.00
Nagetive     0.00    0.00
Zeros        9.00    0.62


========================================= GarageArea =========================================


Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.17998090674623907)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean       472.98     NaN
Std        213.80     NaN
Min          0.00     NaN
25%        334.50     NaN
50%        480.00     NaN
75%        576.00     NaN
Max       1418.00     NaN
Nunique    441.00   30.21
Outlies      7.00    0.48
Nagetive     0.00    0.00
Zeros       81.00    5.55


========================================= GarageCars =========================================


Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: -0.3425489297486655)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         1.77     NaN
Std          0.75     NaN
Min          0.00     NaN
25%          1.00     NaN
50%          2.00     NaN
75%          2.00     NaN
Max          4.00     NaN
Nunique      5.00    0.34
Outlies      0.00    0.00
Nagetive     0.00    0.00
Zeros       81.00    5.55


========================================= GrLivArea =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.3665603560164552)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      1515.46     NaN
Std        525.48     NaN
Min        334.00     NaN
25%       1129.50     NaN
50%       1464.00     NaN
75%       1776.75     NaN
Max       5642.00     NaN
Nunique    861.00   58.97
Outlies     16.00    1.10
Nagetive     0.00    0.00
Zeros        0.00    0.00


========================================= HalfBath =========================================



-------------------------

This Columns is duplicate of <HalfBath_str> column
Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.675897448233722)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         0.38     NaN
Std          0.50     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          1.00     NaN
Max          2.00     NaN
Nunique      3.00    0.21
Outlies     12.00    0.82
Nagetive     0.00    0.00
Zeros      913.00   62.53


========================================= KitchenAbvGr =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 4.488396777072859)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         1.05     NaN
Std          0.22     NaN
Min          0.00     NaN
25%          1.00     NaN
50%          1.00     NaN
75%          1.00     NaN
Max          3.00     NaN
Nunique      4.00    0.27
Outlies     68.00    4.66
Nagetive     0.00    0.00
Zeros        1.00    0.07


========================================= LotArea =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 12.207687851233496)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

              Count   Ratio
Count       1460.00  100.00
NA             0.00    0.00
Mean       10516.83     NaN
Std         9981.26     NaN
Min         1300.00     NaN
25%         7553.50     NaN
50%         9478.50     NaN
75%        11601.50     NaN
Max       215245.00     NaN
Nunique     1073.00   73.49
Outlies       13.00    0.89
Nagetive       0.00    0.00
Zeros          0.00    0.00


========================================= LotFrontage =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 2.402352471321692)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count  Ratio
Count     1460.00  100.0
NA           0.00    0.0
Mean        69.92    NaN
Std         22.03    NaN
Min         21.00    NaN
25%         60.00    NaN
50%         69.31    NaN
75%         79.00    NaN
Max        313.00    NaN
Nunique    111.00    7.6
Outlies     16.00    1.1
Nagetive     0.00    0.0
Zeros        0.00    0.0


========================================= MSSubClass =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.4076567471495591)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

           Count   Ratio
Count     1460.0  100.00
NA           0.0    0.00
Mean        56.9     NaN
Std         42.3     NaN
Min         20.0     NaN
25%         20.0     NaN
50%         50.0     NaN
75%         70.0     NaN
Max        190.0     NaN
Nunique     15.0    1.03
Outlies     30.0    2.05
Nagetive     0.0    0.00
Zeros        0.0    0.00


========================================= MasVnrArea =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 2.676545581771927)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean       103.68     NaN
Std        180.57     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%        164.25     NaN
Max       1600.00     NaN
Nunique    328.00   22.47
Outlies     32.00    2.19
Nagetive     0.00    0.00
Zeros      861.00   58.97


========================================= MiscVal =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 24.476794188821916)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

             Count   Ratio
Count      1460.00  100.00
NA            0.00    0.00
Mean         43.49     NaN
Std         496.12     NaN
Min           0.00     NaN
25%           0.00     NaN
50%           0.00     NaN
75%           0.00     NaN
Max       15500.00     NaN
Nunique      21.00    1.44
Outlies       8.00    0.55
Nagetive      0.00    0.00
Zeros      1408.00   96.44


========================================= MoSold =========================================



-------------------------

This Columns is duplicate of <MoSold_str> column
Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.21205298505146022)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         6.32     NaN
Std          2.70     NaN
Min          1.00     NaN
25%          5.00     NaN
50%          6.00     NaN
75%          8.00     NaN
Max         12.00     NaN
Nunique     12.00    0.82
Outlies      0.00    0.00
Nagetive     0.00    0.00
Zeros        0.00    0.00


========================================= OpenPorchSF =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 2.3643417403694404)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean        46.66     NaN
Std         66.26     NaN
Min          0.00     NaN
25%          0.00     NaN
50%         25.00     NaN
75%         68.00     NaN
Max        547.00     NaN
Nunique    202.00   13.84
Outlies     27.00    1.85
Nagetive     0.00    0.00
Zeros      656.00   44.93


========================================= OverallCond =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.6930674724842182)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         5.58     NaN
Std          1.11     NaN
Min          1.00     NaN
25%          5.00     NaN
50%          5.00     NaN
75%          6.00     NaN
Max          9.00     NaN
Nunique      9.00    0.62
Outlies     28.00    1.92
Nagetive     0.00    0.00
Zeros        0.00    0.00


========================================= OverallQual =========================================


Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.2169439277628693)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         6.10     NaN
Std          1.38     NaN
Min          1.00     NaN
25%          5.00     NaN
50%          6.00     NaN
75%          7.00     NaN
Max         10.00     NaN
Nunique     10.00    0.68
Outlies      2.00    0.14
Nagetive     0.00    0.00
Zeros        0.00    0.00


========================================= SalePrice =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.8828757597682129)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

             Count   Ratio
Count       1460.0  100.00
NA             0.0    0.00
Mean      180921.2     NaN
Std        79442.5     NaN
Min        34900.0     NaN
25%       129975.0     NaN
50%       163000.0     NaN
75%       214000.0     NaN
Max       755000.0     NaN
Nunique      663.0   45.41
Outlies       22.0    1.51
Nagetive       0.0    0.00
Zeros          0.0    0.00


========================================= ScreenPorch =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 4.122213743143115)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean        15.06     NaN
Std         55.76     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max        480.00     NaN
Nunique     76.00    5.21
Outlies     55.00    3.77
Nagetive     0.00    0.00
Zeros     1344.00   92.05


========================================= TotRmsAbvGrd =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.6763408364355531)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         6.52     NaN
Std          1.63     NaN
Min          2.00     NaN
25%          5.00     NaN
50%          6.00     NaN
75%          7.00     NaN
Max         14.00     NaN
Nunique     12.00    0.82
Outlies     12.00    0.82
Nagetive     0.00    0.00
Zeros        0.00    0.00


========================================= TotalBsmtSF =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.5242545490627664)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      1057.43     NaN
Std        438.71     NaN
Min          0.00     NaN
25%        795.75     NaN
50%        991.50     NaN
75%       1298.25     NaN
Max       6110.00     NaN
Nunique    721.00   49.38
Outlies     10.00    0.68
Nagetive     0.00    0.00
Zeros       37.00    2.53


========================================= WoodDeckSF =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.5413757571931312)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean        94.24     NaN
Std        125.34     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%        168.00     NaN
Max        857.00     NaN
Nunique    274.00   18.77
Outlies     22.00    1.51
Nagetive     0.00    0.00
Zeros      761.00   52.12


========================================= YearBuilt =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: -0.613461172488183)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      1971.27     NaN
Std         30.20     NaN
Min       1872.00     NaN
25%       1954.00     NaN
50%       1973.00     NaN
75%       2000.00     NaN
Max       2010.00     NaN
Nunique    112.00    7.67
Outlies      6.00    0.41
Nagetive     0.00    0.00
Zeros        0.00    0.00


========================================= YearRemodAdd =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: -0.5035620027004709)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      1984.87     NaN
Std         20.65     NaN
Min       1950.00     NaN
25%       1967.00     NaN
50%       1994.00     NaN
75%       2004.00     NaN
Max       2010.00     NaN
Nunique     61.00    4.18
Outlies      0.00    0.00
Nagetive     0.00    0.00
Zeros        0.00    0.00


========================================= YrSold =========================================



-------------------------

This Columns is duplicate of <YrSold_str> column
Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.09626851386568028)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      2007.82     NaN
Std          1.33     NaN
Min       2006.00     NaN
25%       2007.00     NaN
50%       2008.00     NaN
75%       2009.00     NaN
Max       2010.00     NaN
Nunique      5.00    0.34
Outlies      0.00    0.00
Nagetive     0.00    0.00
Zeros        0.00    0.00


----------------------------------------------------------------------------------------------
****************************************** Modeling ******************************************

-------------------- This is Regression problem --------------------

''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''

 ------------------------------------- Linear Regression -------------------------------------


-------------------------

NOTE: This summary saved as <summary_OLS_1.csv>

-------------------------

                         Variable        coef    std err      t  P>|t|      [0.025      0.975] Indicator
0                        1stFlrSF    -18.9291     31.979 -0.592  0.554     -81.703      43.845          
1                        2ndFlrSF     -0.7634     30.868 -0.025  0.980     -61.357      59.830          
2                     Alley__Grvl   2605.5787   4932.448  0.528  0.597   -7076.714   12300.000          
3          Alley__No alley access    137.6717   3489.529  0.039  0.969   -6712.200    6987.544          
4                     Alley__Pave  -2743.1919   5317.879 -0.516  0.606  -13200.000    7695.693          
5                    BedroomAbvGr  -3597.4928   4643.740 -0.775  0.439  -12700.000    5518.071          
6           BedroomAbvGr_str__"1"   -747.0720  11300.000 -0.066  0.947  -22900.000   21400.000          
7           BedroomAbvGr_str__"2"    729.8443   6427.864  0.114  0.910  -11900.000   13300.000          
8           BedroomAbvGr_str__"3"   -450.6655   3615.612 -0.125  0.901   -7548.037    6646.706          
9           BedroomAbvGr_str__"4"   7809.2207   5142.021  1.519  0.129   -2284.459   17900.000          
10          BedroomAbvGr_str__"5"  -9867.5121  11300.000 -0.875  0.382  -32000.000   12300.000          
11   BedroomAbvGr_str__Rare cases   2526.2431  11100.000  0.228  0.820  -19200.000   24300.000          
12                 BldgType__1Fam   2640.0462  16800.000  0.157  0.875  -30400.000   35700.000          
13               BldgType__2fmCon   9404.5974  40500.000  0.232  0.816  -70000.000   88900.000          
14               BldgType__Duplex  -9000.5928   7315.770 -1.230  0.219  -23400.000    5360.111          
15                BldgType__Twnhs  -1765.9982  16200.000 -0.109  0.913  -33600.000   30100.000          
16               BldgType__TwnhsE  -1277.9941  15400.000 -0.083  0.934  -31400.000   28900.000          
17                   BsmtCond__Fa  -2291.7883   5112.848 -0.448  0.654  -12300.000    7744.626          
18                   BsmtCond__Gd   1815.1592   4647.127  0.391  0.696   -7307.053   10900.000          
19          BsmtCond__No Basement  -1146.3490   6853.290 -0.167  0.867  -14600.000   12300.000          
20                   BsmtCond__TA   1623.0366   3442.676  0.471  0.637   -5134.864    8380.937          
21               BsmtExposure__Av   -955.3657   6265.186 -0.152  0.879  -13300.000   11300.000          
22               BsmtExposure__Gd  19180.0000   6699.586  2.863  0.004    6030.572   32300.000        **
23               BsmtExposure__Mn  -2522.3406   6537.230 -0.386  0.700  -15400.000   10300.000          
24               BsmtExposure__No  -7207.7651   6050.742 -1.191  0.234  -19100.000    4669.714          
25      BsmtExposure__No Basement  -8496.1886  23300.000 -0.365  0.715  -54200.000   37200.000          
26                     BsmtFinSF1     -2.1508      4.058 -0.530  0.596     -10.116       5.814          
27                     BsmtFinSF2      9.5969      8.770  1.094  0.274      -7.619      26.813          
28              BsmtFinType1__ALQ   4102.1275   2929.984  1.400  0.162   -1649.371    9853.626          
29              BsmtFinType1__BLQ    331.6853   3209.358  0.103  0.918   -5968.217    6631.588          
30              BsmtFinType1__GLQ   3969.4932   3101.691  1.280  0.201   -2119.062   10100.000          
31              BsmtFinType1__LwQ  -2964.4002   4409.169 -0.672  0.502  -11600.000    5690.705          
32      BsmtFinType1__No Basement  -1146.3490   6853.290 -0.167  0.867  -14600.000   12300.000          
33              BsmtFinType1__Rec   1337.3169   3440.467  0.389  0.698   -5416.249    8090.882          
34              BsmtFinType1__Unf  -5629.8152   3118.153 -1.805  0.071  -11800.000     491.054         .
35              BsmtFinType2__ALQ  -9194.9585   8364.803 -1.099  0.272  -25600.000    7224.974          
36              BsmtFinType2__BLQ   2206.1554   6212.058  0.355  0.723   -9987.985   14400.000          
37              BsmtFinType2__GLQ  -3490.8332   9335.643 -0.374  0.709  -21800.000   14800.000          
38              BsmtFinType2__LwQ   1429.4090   5496.851  0.260  0.795   -9360.794   12200.000          
39      BsmtFinType2__No Basement  -1146.3490   6853.290 -0.167  0.867  -14600.000   12300.000          
40              BsmtFinType2__Rec   1424.8590   5220.650  0.273  0.785   -8823.168   11700.000          
41              BsmtFinType2__Unf   8771.7758   5249.636  1.671  0.095   -1533.149   19100.000         .
42                   BsmtFullBath   3373.6210  11900.000  0.282  0.778  -20100.000   26800.000          
43        BsmtFullBath_str__"0.0"   9008.7075  12600.000  0.712  0.477  -15800.000   33800.000          
44        BsmtFullBath_str__"1.0"  12040.0000   4748.957  2.535  0.011    2715.731   21400.000         *
45        BsmtFullBath_str__"2.0" -21050.0000  14800.000 -1.418  0.157  -50200.000    8089.782          
46                   BsmtHalfBath    966.2138   1596.820  0.605  0.545   -2168.311    4100.739          
47        BsmtHalfBath_str__"0.0"   -966.1553   1597.006 -0.605  0.545   -4101.044    2168.733          
48        BsmtHalfBath_str__"1.0"    966.2138   1596.820  0.605  0.545   -2168.311    4100.739          
49                   BsmtQual__Ex  10400.0000   4854.970  2.142  0.032     870.463   19900.000         *
50                   BsmtQual__Fa  -5840.2387   6853.000 -0.852  0.394  -19300.000    7612.056          
51                   BsmtQual__Gd   -847.1803   3500.227 -0.242  0.809   -7718.053    6023.692          
52          BsmtQual__No Basement  -1146.3490   6853.290 -0.167  0.867  -14600.000   12300.000          
53                   BsmtQual__TA  -2566.8404   3499.546 -0.733  0.463   -9436.375    4302.694          
54                      BsmtUnfSF     -6.3844      4.070 -1.569  0.117     -14.373       1.605          
55                  CentralAir__N   -846.8181   3020.771 -0.280  0.779   -6776.527    5082.891          
56                  CentralAir__Y    846.8767   3020.765  0.280  0.779   -5082.821    6776.574          
57             Condition1__Artery   3732.8700   6192.364  0.603  0.547   -8422.611   15900.000          
58              Condition1__Feedr   -496.4521   5065.223 -0.098  0.922  -10400.000    9446.474          
59               Condition1__Norm  10920.0000   3472.399  3.145  0.002    4105.120   17700.000        **
60               Condition1__PosN   -563.8500   7897.333 -0.071  0.943  -16100.000   14900.000          
61               Condition1__RRAe -18820.0000  13200.000 -1.431  0.153  -44600.000    7000.931          
62               Condition1__RRAn   7528.7156   7083.730  1.063  0.288   -6376.497   21400.000          
63         Condition1__Rare cases  -2303.4221   9221.158 -0.250  0.803  -20400.000   15800.000          
64              Electrical__FuseA   3211.1933   3976.491  0.808  0.420   -4594.575   11000.000          
65              Electrical__FuseF  -1947.4463   5474.071 -0.356  0.722  -12700.000    8798.041          
66              Electrical__SBrkr  -1263.6885   3296.593 -0.383  0.702   -7734.832    5207.455          
67                  EnclosedPorch      8.3079     19.643  0.423  0.672     -30.251      46.866          
68                  ExterCond__Fa   4465.3177   5747.467  0.777  0.437   -6816.840   15700.000          
69                  ExterCond__Gd  -6896.6053   3721.336 -1.853  0.064  -14200.000     408.299         .
70                  ExterCond__TA   2431.3460   3148.612  0.772  0.440   -3749.313    8612.005          
71                  ExterQual__Ex   -312.4044   7410.732 -0.042  0.966  -14900.000   14200.000          
72                  ExterQual__Fa   8784.3626  13000.000  0.678  0.498  -16600.000   34200.000          
73                  ExterQual__Gd  -1930.8438   5030.099 -0.384  0.701  -11800.000    7943.134          
74                  ExterQual__TA  -6541.0559   4949.599 -1.322  0.187  -16300.000    3174.903          
75           Exterior1st__AsbShng -13900.0000  16100.000 -0.861  0.389  -45600.000   17800.000          
76           Exterior1st__BrkFace  11970.0000   8076.529  1.482  0.139   -3881.228   27800.000          
77           Exterior1st__CemntBd  34380.0000  22800.000  1.510  0.131  -10300.000   79100.000          
78           Exterior1st__HdBoard  -4457.8950   7229.429 -0.617  0.538  -18600.000    9733.323          
79           Exterior1st__MetalSd  -6254.5170  15600.000 -0.400  0.689  -37000.000   24400.000          
80           Exterior1st__Plywood   -336.5469   7381.400 -0.046  0.964  -14800.000   14200.000          
81            Exterior1st__Stucco   -846.7644  14200.000 -0.059  0.953  -28800.000   27100.000          
82           Exterior1st__VinylSd -15190.0000   8383.864 -1.812  0.070  -31600.000    1269.085         .
83           Exterior1st__Wd Sdng  -2063.0215   6229.668 -0.331  0.741  -14300.000   10200.000          
84           Exterior1st__WdShing  -3306.7264   8627.406 -0.383  0.702  -20200.000   13600.000          
85           Exterior2nd__AsbShng   9117.7905  15700.000  0.582  0.561  -21700.000   39900.000          
86           Exterior2nd__BrkFace    811.8004  10900.000  0.074  0.941  -20600.000   22300.000          
87           Exterior2nd__CmentBd -51790.0000  23700.000 -2.186  0.029  -98300.000   -5274.610         *
88           Exterior2nd__HdBoard   2726.4321   7007.409  0.389  0.697  -11000.000   16500.000          
89           Exterior2nd__ImStucc  26090.0000  11800.000  2.220  0.027    3022.361   49200.000         *
90           Exterior2nd__MetalSd   9677.6213  15800.000  0.611  0.542  -21400.000   40800.000          
91           Exterior2nd__Plywood   3312.2103   6478.681  0.511  0.609   -9405.305   16000.000          
92        Exterior2nd__Rare cases   2604.9128   9905.871  0.263  0.793  -16800.000   22000.000          
93            Exterior2nd__Stucco -23580.0000  14800.000 -1.592  0.112  -52700.000    5493.237          
94           Exterior2nd__VinylSd  14500.0000   8237.632  1.760  0.079   -1674.449   30700.000         .
95           Exterior2nd__Wd Sdng   4142.5850   6151.754  0.673  0.501   -7933.179   16200.000          
96           Exterior2nd__Wd Shng   2394.6791   7476.449  0.320  0.749  -12300.000   17100.000          
97                   Fence__GdPrv   2352.7718   4990.257  0.471  0.637   -7442.998   12100.000          
98                    Fence__GdWo   1200.5834   5164.365  0.232  0.816   -8936.957   11300.000          
99                   Fence__MnPrv   3279.3900   4081.948  0.803  0.422   -4733.389   11300.000          
100                   Fence__MnWw  -4941.3647  10900.000 -0.451  0.652  -26400.000   16500.000          
101               Fence__No Fence  -1891.3219   3372.180 -0.561  0.575   -8510.841    4728.197          
102               FireplaceQu__Ex  11880.0000   6631.453  1.792  0.074   -1135.124   24900.000         .
103               FireplaceQu__Fa  -4539.2003   5748.670 -0.790  0.430  -15800.000    6745.319          
104               FireplaceQu__Gd  -2270.0968   3015.670 -0.753  0.452   -8189.793    3649.600          
105     FireplaceQu__No Fireplace  -1538.4713    770.427 -1.997  0.046   -3050.803     -26.139         *
106               FireplaceQu__Po   -634.2478   6632.834 -0.096  0.924  -13700.000   12400.000          
107               FireplaceQu__TA  -2900.2054   3268.343 -0.887  0.375   -9315.893    3515.483          
108                    Fireplaces -17470.0000   5177.698 -3.374  0.001  -27600.000   -7303.447        **
109           Fireplaces_str__"0" -25760.0000   5323.387 -4.839  0.000  -36200.000  -15300.000       ***
110           Fireplaces_str__"1"  -3675.9313   1685.955 -2.180  0.030   -6985.425    -366.437         *
111           Fireplaces_str__"2"  29430.0000   5724.041  5.142  0.000   18200.000   40700.000       ***
112            Foundation__BrkTil  -2600.6615   5740.920 -0.453  0.651  -13900.000    8668.643          
113            Foundation__CBlock   2769.6422   4486.126  0.617  0.537   -6036.530   11600.000          
114             Foundation__PConc   4089.2367   4717.156  0.867  0.386   -5170.441   13300.000          
115        Foundation__Rare cases   1568.3519  10500.000  0.150  0.881  -19000.000   22100.000          
116              Foundation__Slab  -5826.5108  13300.000 -0.437  0.662  -32000.000   20300.000          
117                      FullBath   6058.7534   6120.779  0.990  0.323   -5956.207   18100.000          
118             FullBath_str__"1" -16390.0000   8447.365 -1.941  0.053  -33000.000     188.950         .
119             FullBath_str__"2" -14450.0000   4225.459 -3.420  0.001  -22700.000   -6158.111        **
120             FullBath_str__"3"  17120.0000   4009.263  4.270  0.000    9248.902   25000.000       ***
121      FullBath_str__Rare cases  13730.0000   8408.071  1.633  0.103   -2778.160   30200.000          
122              Functional__Maj1 -16200.0000  10500.000 -1.544  0.123  -36800.000    4393.314          
123              Functional__Min1  -2100.2088   6308.488 -0.333  0.739  -14500.000   10300.000          
124              Functional__Min2   8853.0233   6764.342  1.309  0.191   -4425.237   22100.000          
125               Functional__Mod  -2294.5000   7879.046 -0.291  0.771  -17800.000   13200.000          
126               Functional__Typ  11740.0000   4315.963  2.720  0.007    3266.759   20200.000        **
127                    GarageArea     -0.4762     12.020 -0.040  0.968     -24.071      23.119          
128                    GarageCars  17210.0000   7976.569  2.157  0.031    1547.973   32900.000         *
129         GarageCars_str__"0.0"   2601.8175   2343.661  1.110  0.267   -1998.740    7202.375          
130         GarageCars_str__"1.0"   5055.3689   7573.893  0.667  0.505   -9812.025   19900.000          
131         GarageCars_str__"2.0"  -6389.0263   1983.306 -3.221  0.001  -10300.000   -2495.837        **
132         GarageCars_str__"3.0"  -1268.1016   8968.512 -0.141  0.888  -18900.000   16300.000          
133                GarageCond__Fa  -3618.5777   6222.544 -0.582  0.561  -15800.000    8596.146          
134         GarageCond__No Garage   2601.8175   2343.661  1.110  0.267   -1998.740    7202.375          
135        GarageCond__Rare cases  -3414.7673   6891.991 -0.495  0.620  -16900.000   10100.000          
136                GarageCond__TA   4431.5860   4744.790  0.934  0.351   -4882.336   13700.000          
137             GarageFinish__Fin    939.9693   2090.693  0.450  0.653   -3164.017    5043.955          
138       GarageFinish__No Garage   2601.8175   2343.661  1.110  0.267   -1998.740    7202.375          
139             GarageFinish__RFn   -643.9639   1808.495 -0.356  0.722   -4194.001    2906.073          
140             GarageFinish__Unf  -2897.7644   2154.438 -1.345  0.179   -7126.880    1331.351          
141                GarageQual__Fa  -6870.6135   6183.708 -1.111  0.267  -19000.000    5267.876          
142                GarageQual__Gd   4780.3569   7666.879  0.624  0.533  -10300.000   19800.000          
143         GarageQual__No Garage   2601.8175   2343.661  1.110  0.267   -1998.740    7202.375          
144                GarageQual__TA   -511.5024   4677.222 -0.109  0.913   -9692.792    8669.787          
145            GarageType__Attchd   6575.8057   3232.901  2.034  0.042     229.689   12900.000         *
146           GarageType__Basment   3422.5864   8049.200  0.425  0.671  -12400.000   19200.000          
147           GarageType__BuiltIn  -5692.1896   4878.686 -1.167  0.244  -15300.000    3884.569          
148            GarageType__Detchd   5929.9652   3716.066  1.596  0.111   -1364.594   13200.000          
149         GarageType__No Garage   2601.8175   2343.661  1.110  0.267   -1998.740    7202.375          
150        GarageType__Rare cases -12840.0000   8674.122 -1.480  0.139  -29900.000    4189.194          
151           GarageYrBlt__1920.0   8890.2752  11100.000  0.800  0.424  -12900.000   30700.000          
152           GarageYrBlt__1925.0  -5147.7715  12700.000 -0.405  0.686  -30100.000   19800.000          
153           GarageYrBlt__1940.0    147.7335  10200.000  0.014  0.988  -19900.000   20200.000          
154           GarageYrBlt__1941.0 -13030.0000  11700.000 -1.109  0.268  -36100.000   10000.000          
155           GarageYrBlt__1948.0   6120.5497  10300.000  0.592  0.554  -14200.000   26400.000          
156           GarageYrBlt__1950.0   9453.3317   8616.078  1.097  0.273   -7459.848   26400.000          
157           GarageYrBlt__1953.0  -8170.0970  11200.000 -0.729  0.466  -30200.000   13800.000          
158           GarageYrBlt__1954.0   5616.8723   8383.653  0.670  0.503  -10800.000   22100.000          
159           GarageYrBlt__1955.0   5002.4776  11200.000  0.448  0.654  -16900.000   26900.000          
160           GarageYrBlt__1956.0    502.3539   8533.073  0.059  0.953  -16200.000   17300.000          
161           GarageYrBlt__1957.0 -12370.0000   8778.900 -1.410  0.159  -29600.000    4858.379          
162           GarageYrBlt__1958.0  -4305.6688   8491.188 -0.507  0.612  -21000.000   12400.000          
163           GarageYrBlt__1959.0  -3143.2312  11300.000 -0.277  0.781  -25400.000   19100.000          
164           GarageYrBlt__1960.0   2156.7368   9286.725  0.232  0.816  -16100.000   20400.000          
165           GarageYrBlt__1961.0    821.4058   9523.383  0.086  0.931  -17900.000   19500.000          
166           GarageYrBlt__1962.0   2352.5008   8579.673  0.274  0.784  -14500.000   19200.000          
167           GarageYrBlt__1963.0  -5176.7958  11900.000 -0.435  0.664  -28500.000   18200.000          
168           GarageYrBlt__1964.0   4404.6787   8352.097  0.527  0.598  -12000.000   20800.000          
169           GarageYrBlt__1965.0   4515.2740   7759.576  0.582  0.561  -10700.000   19700.000          
170           GarageYrBlt__1966.0   7713.2053   8452.440  0.913  0.362   -8878.758   24300.000          
171           GarageYrBlt__1967.0  -2796.3429   9749.350 -0.287  0.774  -21900.000   16300.000          
172           GarageYrBlt__1968.0   2161.7574   7329.970  0.295  0.768  -12200.000   16600.000          
173           GarageYrBlt__1969.0  -4376.0380   9844.444 -0.445  0.657  -23700.000   14900.000          
174           GarageYrBlt__1970.0  -6066.7877   8479.347 -0.715  0.475  -22700.000   10600.000          
175           GarageYrBlt__1971.0  -8834.2134  10400.000 -0.848  0.397  -29300.000   11600.000          
176           GarageYrBlt__1972.0   5459.8952  12600.000  0.433  0.665  -19300.000   30200.000          
177           GarageYrBlt__1973.0  -5189.7205  10900.000 -0.474  0.636  -26700.000   16300.000          
178           GarageYrBlt__1974.0   3708.8970   8733.497  0.425  0.671  -13400.000   20900.000          
179           GarageYrBlt__1976.0   -547.9292   7466.268 -0.073  0.942  -15200.000   14100.000          
180           GarageYrBlt__1977.0  -9148.6240   6577.647 -1.391  0.165  -22100.000    3763.159          
181           GarageYrBlt__1978.0  -4003.6845   8986.478 -0.446  0.656  -21600.000   13600.000          
182           GarageYrBlt__1979.0   8813.8773   9689.640  0.910  0.363  -10200.000   27800.000          
183           GarageYrBlt__1980.0  -3602.2659   9586.221 -0.376  0.707  -22400.000   15200.000          
184           GarageYrBlt__1981.0  -3467.5224  12400.000 -0.279  0.780  -27900.000   20900.000          
185           GarageYrBlt__1985.0 -16340.0000  11100.000 -1.476  0.140  -38100.000    5390.701          
186           GarageYrBlt__1987.0   7774.5942  10900.000  0.715  0.475  -13600.000   29100.000          
187           GarageYrBlt__1988.0  -2355.4834   8842.483 -0.266  0.790  -19700.000   15000.000          
188           GarageYrBlt__1989.0  -3939.7325  11600.000 -0.339  0.735  -26700.000   18900.000          
189           GarageYrBlt__1990.0  -2281.7813   9148.622 -0.249  0.803  -20200.000   15700.000          
190           GarageYrBlt__1992.0 -15540.0000   9630.900 -1.613  0.107  -34400.000    3368.385          
191           GarageYrBlt__1993.0  -1556.7949   7988.941 -0.195  0.846  -17200.000   14100.000          
192           GarageYrBlt__1994.0   6794.4503   8460.653  0.803  0.422   -9813.633   23400.000          
193           GarageYrBlt__1995.0  13130.0000   7779.559  1.688  0.092   -2141.221   28400.000         .
194           GarageYrBlt__1996.0  14630.0000   8853.710  1.652  0.099   -2751.196   32000.000         .
195           GarageYrBlt__1997.0   4909.9572   7633.986  0.643  0.520  -10100.000   19900.000          
196           GarageYrBlt__1998.0   1565.3608   7327.271  0.214  0.831  -12800.000   15900.000          
197           GarageYrBlt__1999.0  -6415.8034   7600.197 -0.844  0.399  -21300.000    8503.225          
198           GarageYrBlt__2000.0   3484.3090   7562.350  0.461  0.645  -11400.000   18300.000          
199           GarageYrBlt__2001.0   6898.9150   8026.015  0.860  0.390   -8855.984   22700.000          
200           GarageYrBlt__2002.0  -1586.3749   7392.060 -0.215  0.830  -16100.000   12900.000          
201           GarageYrBlt__2003.0  -4813.0710   6569.778 -0.733  0.464  -17700.000    8083.265          
202           GarageYrBlt__2004.0  -1465.3036   6752.263 -0.217  0.828  -14700.000   11800.000          
203           GarageYrBlt__2005.0  -6917.1792   6128.087 -1.129  0.259  -18900.000    5112.126          
204           GarageYrBlt__2006.0   2816.1685   7154.678  0.394  0.694  -11200.000   16900.000          
205           GarageYrBlt__2007.0 -15590.0000   7336.068 -2.125  0.034  -30000.000   -1187.476         *
206           GarageYrBlt__2008.0   8908.1849   9669.565  0.921  0.357  -10100.000   27900.000          
207           GarageYrBlt__2009.0  32890.0000   9997.430  3.290  0.001   13300.000   52500.000        **
208        GarageYrBlt__No Garage   2601.8175   2343.661  1.110  0.267   -1998.740    7202.375          
209       GarageYrBlt__Rare cases  -6057.8308   3967.706 -1.527  0.127  -13800.000    1730.693          
210                     GrLivArea     58.5267     31.474  1.860  0.063      -3.257     120.310         .
211                      HalfBath   2858.3009   5280.756  0.541  0.588   -7507.713   13200.000          
212             HalfBath_str__"0"  -1193.7418   1131.388 -1.055  0.292   -3414.632    1027.149          
213             HalfBath_str__"1"   -470.7004   5361.759 -0.088  0.930  -11000.000   10100.000          
214             HalfBath_str__"2"   1664.5006   5199.734  0.320  0.749   -8542.469   11900.000          
215                 HeatingQC__Ex   2249.8611   2424.900  0.928  0.354   -2510.167    7009.889          
216                 HeatingQC__Fa   1641.9802   4488.338  0.366  0.715   -7168.534   10500.000          
217                 HeatingQC__Gd  -2790.8347   2501.144 -1.116  0.265   -7700.528    2118.858          
218                 HeatingQC__TA  -1100.9481   2269.879 -0.485  0.628   -5556.673    3354.777          
219            HouseStyle__1.5Fin   -243.1712  11800.000 -0.021  0.984  -23400.000   22900.000          
220            HouseStyle__1.5Unf  20120.0000  22200.000  0.907  0.365  -23400.000   63700.000          
221            HouseStyle__1Story  24520.0000  10400.000  2.368  0.018    4191.716   44800.000         *
222            HouseStyle__2.5Unf -29770.0000  21800.000 -1.368  0.172  -72500.000   12900.000          
223            HouseStyle__2Story  -8629.9625  10400.000 -0.833  0.405  -29000.000   11700.000          
224        HouseStyle__Rare cases -40760.0000  24700.000 -1.650  0.099  -89200.000    7725.341         .
225            HouseStyle__SFoyer  25490.0000  17000.000  1.502  0.133   -7816.221   58800.000          
226              HouseStyle__SLvl   9273.4006  16200.000  0.573  0.567  -22500.000   41000.000          
227                  KitchenAbvGr  -3074.7682  12200.000 -0.253  0.800  -26900.000   20800.000          
228         KitchenAbvGr_str__"1"  -3091.8578   7442.467 -0.415  0.678  -17700.000   11500.000          
229         KitchenAbvGr_str__"2"   3091.9163   7442.191  0.415  0.678  -11500.000   17700.000          
230               KitchenQual__Ex  12250.0000   4598.609  2.664  0.008    3222.825   21300.000        **
231               KitchenQual__Fa   6815.9285   5923.506  1.151  0.250   -4811.789   18400.000          
232               KitchenQual__Gd  -7953.1610   2864.050 -2.777  0.006  -13600.000   -2331.091        **
233               KitchenQual__TA -11110.0000   2783.603 -3.992  0.000  -16600.000   -5648.353       ***
234              LandContour__Bnk -19710.0000   4663.028 -4.226  0.000  -28900.000  -10600.000       ***
235              LandContour__HLS   8699.8297   5018.809  1.733  0.083   -1151.987   18600.000         .
236              LandContour__Low   7869.9105   6514.508  1.208  0.227   -4917.932   20700.000          
237              LandContour__Lvl   3137.3933   3509.407  0.894  0.372   -3751.498   10000.000          
238                LandSlope__Gtl   4614.4167   5781.516  0.798  0.425   -6734.578   16000.000          
239                LandSlope__Mod   5538.7763   5639.141  0.982  0.326   -5530.740   16600.000          
240                LandSlope__Sev -10150.0000   9607.560 -1.057  0.291  -29000.000    8706.304          
241                       LotArea      0.6113      0.153  3.985  0.000       0.310       0.912       ***
242             LotConfig__Corner   -757.2013   2534.947 -0.299  0.765   -5733.249    4218.846          
243            LotConfig__CulDSac   6540.6631   3437.058  1.903  0.057    -206.209   13300.000         .
244                LotConfig__FR2  -4392.0185   4487.580 -0.979  0.328  -13200.000    4417.007          
245             LotConfig__Inside  -1391.3848   2088.164 -0.666  0.505   -5490.407    2707.638          
246                   LotFrontage   -197.8470     62.644 -3.158  0.002    -320.815     -74.879        **
247                 LotShape__IR1   1053.4970   3729.306  0.282  0.778   -6267.053    8374.047          
248                 LotShape__IR2   4438.4363   5170.172  0.858  0.391   -5710.502   14600.000          
249                 LotShape__IR3  -9003.9892   9387.265 -0.959  0.338  -27400.000    9423.015          
250                 LotShape__Reg   3512.1145   3816.623  0.920  0.358   -3979.837   11000.000          
251                    MSSubClass   -226.1444    877.357 -0.258  0.797   -1948.378    1496.089          
252         MSSubClass_str__"120" -15290.0000  34500.000 -0.443  0.658  -83000.000   52400.000          
253         MSSubClass_str__"160"   6137.3503  69300.000  0.089  0.929 -130000.000  142000.000          
254         MSSubClass_str__"180"   5304.4473  87600.000  0.061  0.952 -167000.000  177000.000          
255         MSSubClass_str__"190"   9404.5974  40500.000  0.232  0.816  -70000.000   88900.000          
256          MSSubClass_str__"20" -13500.0000  57400.000 -0.235  0.814 -126000.000   99200.000          
257          MSSubClass_str__"30" -15310.0000  50400.000 -0.304  0.761 -114000.000   83600.000          
258          MSSubClass_str__"45"   3510.6140  43200.000  0.081  0.935  -81300.000   88300.000          
259          MSSubClass_str__"50"   1147.4145  32500.000  0.035  0.972  -62700.000   65000.000          
260          MSSubClass_str__"60"  -2337.5392  23500.000 -0.100  0.921  -48400.000   43700.000          
261          MSSubClass_str__"70"  12940.0000  17500.000  0.741  0.459  -21300.000   47200.000          
262          MSSubClass_str__"75"  27280.0000  27600.000  0.987  0.324  -27000.000   81500.000          
263          MSSubClass_str__"80"   7411.0776  16500.000  0.450  0.653  -24900.000   39700.000          
264          MSSubClass_str__"85" -17700.0000  17600.000 -1.005  0.315  -52300.000   16900.000          
265          MSSubClass_str__"90"  -9000.5928   7315.770 -1.230  0.219  -23400.000    5360.111          
266             MSZoning__C (all) -24950.0000  11400.000 -2.188  0.029  -47300.000   -2566.795         *
267                  MSZoning__FV   4174.3482   8360.192  0.499  0.618  -12200.000   20600.000          
268                  MSZoning__RH    814.8452   8842.482  0.092  0.927  -16500.000   18200.000          
269                  MSZoning__RL  10410.0000   4674.320  2.227  0.026    1231.802   19600.000         *
270                  MSZoning__RM   9549.7076   5628.541  1.697  0.090   -1499.001   20600.000         .
271                    MasVnrArea      5.1575      8.543  0.604  0.546     -11.613      21.928          
272            MasVnrType__BrkCmn  -2497.4653   7598.832 -0.329  0.742  -17400.000   12400.000          
273           MasVnrType__BrkFace   1201.8715   3148.718  0.382  0.703   -4978.996    7382.739          
274              MasVnrType__None   3443.5586   3315.077  1.039  0.299   -3063.869    9950.986          
275             MasVnrType__Stone  -2147.9062   4027.552 -0.533  0.594  -10100.000    5758.093          
276             MiscFeature__None -19310.0000  15100.000 -1.278  0.201  -49000.000   10300.000          
277             MiscFeature__Shed  19310.0000  15100.000  1.278  0.201  -10300.000   49000.000          
278                       MiscVal      3.7499      3.093  1.212  0.226      -2.321       9.821          
279              MiscVal_str__"0"  30290.0000  21000.000  1.439  0.151  -11000.000   71600.000          
280            MiscVal_str__"400" -14200.0000  12400.000 -1.145  0.253  -38600.000   10100.000          
281       MiscVal_str__Rare cases -16080.0000  11900.000 -1.355  0.176  -39400.000    7218.501          
282                        MoSold   -370.3038    382.192 -0.969  0.333   -1120.539     379.931          
283               MoSold_str__"1"   -140.0924   3870.086 -0.036  0.971   -7736.990    7456.805          
284              MoSold_str__"10"  -6137.7369   3622.897 -1.694  0.091  -13200.000     973.935         .
285              MoSold_str__"11"    975.0645   3561.219  0.274  0.784   -6015.534    7965.663          
286              MoSold_str__"12"   1233.6126   3682.759  0.335  0.738   -5995.567    8462.792          
287               MoSold_str__"2"  -2404.6106   4566.538 -0.527  0.599  -11400.000    6559.409          
288               MoSold_str__"3"   1148.4276   3400.245  0.338  0.736   -5526.183    7823.038          
289               MoSold_str__"4"   1333.9458   3079.686  0.433  0.665   -4711.414    7379.306          
290               MoSold_str__"5"    466.7314   2696.955  0.173  0.863   -4827.334    5760.797          
291               MoSold_str__"6"   -647.4570   2410.146 -0.269  0.788   -5378.523    4083.609          
292               MoSold_str__"7"   2434.4470   2469.463  0.986  0.325   -2413.057    7281.951          
293               MoSold_str__"8"   -516.6694   3301.023 -0.157  0.876   -6996.508    5963.169          
294               MoSold_str__"9"   2254.3959   4001.965  0.563  0.573   -5601.377   10100.000          
295         Neighborhood__Blmngtn  -3826.8162  11100.000 -0.344  0.731  -25700.000   18000.000          
296          Neighborhood__BrDale  -2358.5819  13400.000 -0.176  0.860  -28600.000   23900.000          
297         Neighborhood__BrkSide  -2615.4167   7465.791 -0.350  0.726  -17300.000   12000.000          
298         Neighborhood__ClearCr  -4279.7648   8244.046 -0.519  0.604  -20500.000   11900.000          
299         Neighborhood__CollgCr  -1554.8091   4397.474 -0.354  0.724  -10200.000    7077.340          
300         Neighborhood__Crawfor  12860.0000   7198.784  1.787  0.074   -1268.979   27000.000         .
301         Neighborhood__Edwards -18550.0000   4770.574 -3.889  0.000  -27900.000   -9186.292       ***
302         Neighborhood__Gilbert  -3120.4680   5725.753 -0.545  0.586  -14400.000    8119.065          
303          Neighborhood__IDOTRR  -9857.8751  10200.000 -0.968  0.333  -29800.000   10100.000          
304         Neighborhood__MeadowV   8832.6782  14100.000  0.626  0.532  -18900.000   36500.000          
305         Neighborhood__Mitchel -12520.0000   5925.014 -2.113  0.035  -24100.000    -886.133         *
306           Neighborhood__NAmes -10790.0000   4126.423 -2.614  0.009  -18900.000   -2685.108        **
307          Neighborhood__NWAmes  -8388.4586   5415.841 -1.549  0.122  -19000.000    2242.724          
308         Neighborhood__NoRidge  27530.0000   7374.038  3.733  0.000   13100.000   42000.000       ***
309         Neighborhood__NridgHt  27620.0000   6127.616  4.507  0.000   15600.000   39600.000       ***
310         Neighborhood__OldTown -12880.0000   7780.393 -1.656  0.098  -28200.000    2389.453         .
311      Neighborhood__Rare cases  -4479.3430  13900.000 -0.323  0.747  -31700.000   22800.000          
312           Neighborhood__SWISU -11060.0000   9985.532 -1.108  0.268  -30700.000    8540.764          
313          Neighborhood__Sawyer  -8603.3173   5402.972 -1.592  0.112  -19200.000    2002.604          
314         Neighborhood__SawyerW  -1448.2180   5449.716 -0.266  0.791  -12100.000    9249.461          
315         Neighborhood__Somerst  16880.0000   8033.437  2.101  0.036    1105.697   32600.000         *
316         Neighborhood__StoneBr  39090.0000   7806.995  5.006  0.000   23800.000   54400.000       ***
317          Neighborhood__Timber -12070.0000   6419.267 -1.880  0.061  -24700.000     535.206         .
318         Neighborhood__Veenker  -4404.9191  11400.000 -0.385  0.700  -26900.000   18100.000          
319                   OpenPorchSF     -3.2971     16.424 -0.201  0.841     -35.536      28.942          
320                   OverallCond   7712.1195   5687.559  1.356  0.175   -3452.440   18900.000          
321          OverallCond_str__"3"   7418.5480  17400.000  0.426  0.671  -26800.000   41600.000          
322          OverallCond_str__"4"  -3601.7436  12500.000 -0.289  0.772  -28000.000   20800.000          
323          OverallCond_str__"5"   2330.1291   6882.565  0.339  0.735  -11200.000   15800.000          
324          OverallCond_str__"6"   3693.6861   2984.103  1.238  0.216   -2164.045    9551.417          
325          OverallCond_str__"7"   2550.6378   6351.050  0.402  0.688   -9916.340   15000.000          
326          OverallCond_str__"8"  -6046.9613  12100.000 -0.500  0.617  -29800.000   17700.000          
327          OverallCond_str__"9"  -6344.2378  18700.000 -0.340  0.734  -43000.000   30300.000          
328                   OverallQual   6459.9570   6973.470  0.926  0.355   -7228.818   20100.000          
329         OverallQual_str__"10"  29980.0000  26100.000  1.150  0.250  -21200.000   81100.000          
330          OverallQual_str__"3"   9933.3339  25200.000  0.394  0.693  -39500.000   59400.000          
331          OverallQual_str__"4"  -8478.0481  17600.000 -0.480  0.631  -43100.000   26200.000          
332          OverallQual_str__"5" -14130.0000  11400.000 -1.234  0.218  -36600.000    8345.062          
333          OverallQual_str__"6" -15470.0000   5186.080 -2.983  0.003  -25600.000   -5289.570        **
334          OverallQual_str__"7" -15860.0000   4848.309 -3.271  0.001  -25400.000   -6339.594        **
335          OverallQual_str__"8"  -5974.5707  10800.000 -0.552  0.581  -27200.000   15300.000          
336          OverallQual_str__"9"  20000.0000  18300.000  1.090  0.276  -16000.000   56000.000          
337                 PavedDrive__N   4091.2429   3899.456  1.049  0.294   -3563.307   11700.000          
338                 PavedDrive__P  -5695.4005   4953.456 -1.150  0.251  -15400.000    4028.130          
339                 PavedDrive__Y   1604.2162   3219.673  0.498  0.618   -4715.935    7924.367          
340               RoofStyle__Flat  -8829.5348  10600.000 -0.830  0.407  -29700.000   12100.000          
341              RoofStyle__Gable  -2518.5274   4737.129 -0.532  0.595  -11800.000    6780.358          
342            RoofStyle__Gambrel  -2735.9434  12300.000 -0.222  0.824  -26900.000   21500.000          
343                RoofStyle__Hip   -934.7188   5101.934 -0.183  0.855  -10900.000    9080.271          
344         RoofStyle__Rare cases  15020.0000  11000.000  1.368  0.172   -6531.359   36600.000          
345        SaleCondition__Abnorml     97.8715   5951.463  0.016  0.987  -11600.000   11800.000          
346         SaleCondition__Alloca   9125.4332  11400.000  0.804  0.422  -13200.000   31400.000          
347         SaleCondition__Family  -1113.4516   7984.812 -0.139  0.889  -16800.000   14600.000          
348         SaleCondition__Normal   6107.3744   5180.921  1.179  0.239   -4062.666   16300.000          
349        SaleCondition__Partial -14220.0000  15200.000 -0.938  0.349  -44000.000   15500.000          
350                 SaleType__COD  -2932.0952   7287.739 -0.402  0.688  -17200.000   11400.000          
351                 SaleType__New  22050.0000  14400.000  1.534  0.125   -6163.613   50300.000          
352          SaleType__Rare cases -10430.0000   6984.289 -1.494  0.136  -24100.000    3278.935          
353                  SaleType__WD  -8682.6872   5621.273 -1.545  0.123  -19700.000    2351.754          
354                   ScreenPorch     64.2126     17.739  3.620  0.000      29.392      99.033       ***
355                  TotRmsAbvGrd   6627.1664   5714.690  1.160  0.247   -4590.649   17800.000          
356        TotRmsAbvGrd_str__"10"  19240.0000  14700.000  1.306  0.192   -9688.291   48200.000          
357        TotRmsAbvGrd_str__"11" -29400.0000  21400.000 -1.376  0.169  -71300.000   12500.000          
358        TotRmsAbvGrd_str__"12" -28120.0000  26000.000 -1.084  0.279  -79100.000   22800.000          
359         TotRmsAbvGrd_str__"3"  11610.0000  26000.000  0.446  0.655  -39400.000   62700.000          
360         TotRmsAbvGrd_str__"4"   7875.1932  19700.000  0.400  0.689  -30800.000   46500.000          
361         TotRmsAbvGrd_str__"5"   7030.5917  14200.000  0.497  0.620  -20800.000   34800.000          
362         TotRmsAbvGrd_str__"6"   6457.3213   8796.174  0.734  0.463  -10800.000   23700.000          
363         TotRmsAbvGrd_str__"7"   4572.4521   4141.743  1.104  0.270   -3557.703   12700.000          
364         TotRmsAbvGrd_str__"8"    656.6903   4550.841  0.144  0.885   -8276.515    9589.895          
365         TotRmsAbvGrd_str__"9"     80.7595   9467.051  0.009  0.993  -18500.000   18700.000          
366                   TotalBsmtSF      1.0617      5.659  0.188  0.851     -10.047      12.170          
367                    WoodDeckSF      8.3264      8.614  0.967  0.334      -8.583      25.236          
368                     YearBuilt    217.2946    131.382  1.654  0.099     -40.605     475.194         .
369                  YearRemodAdd     95.3702     88.610  1.076  0.282     -78.569     269.310          
370                        YrSold   -324.0238    160.548 -2.018  0.044    -639.176      -8.871         *
371            YrSold_str__"2006"    668.6123   1982.587  0.337  0.736   -3223.164    4560.388          
372            YrSold_str__"2007"   -455.4234   1931.419 -0.236  0.814   -4246.759    3335.912          
373            YrSold_str__"2008"   -226.2129   1942.607 -0.116  0.907   -4039.510    3587.084          
374            YrSold_str__"2009"   -414.1245   1884.067 -0.220  0.826   -4112.508    3284.259          
375            YrSold_str__"2010"    427.2069   2546.820  0.168  0.867   -4572.147    5426.561          

-------------------------

 --- Model statistic --- 

R-squared         : 0.921
Adj. R-squared    : 0.89
F-statistic       : 30
Prob (F-statistic): 8.685953892976382e-302
No. Observations  : 1095
AIC               : 25722
Df Residuals      : 788
BIC               : 27257
RMSE (test)       : 29324

-------------------------

Maximum correlation between Reseduals and any data columns is 1.397018510768434e-12, with columns <LotArea>
Mean of train reseduals: 4.819381692910303e-08

 ------------------------------------- Random Forest -------------------------------------


-------------------------

RF model peramters:

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'criterion': 'mse',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 200,
 'n_jobs': None,
 'oob_score': True,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

-------------------------

-------------------------

--- Model statistic ---
R^2 (test) : 0.8856059270305071
R^2 (train): 0.9792957005523424
RMSE (test): 23653
oob score  : 0.8495393135794702

-------------------------

Maximum correlation between Reseduals and any data columns is 0.323711981173656, with columns <Neighborhood__Crawfor>